###############################################################################
# NICE DSU Technical Support Document 18: Methods for population-adjusted 
# indirect comparisons in submissions to NICE.
#
# This code accompanies Appendix D, and provides a worked example of MAIC and 
# STC population adjustment methods. Simulated datasets are used here for 
# exposition, but the methods employed are general and should be readily 
# applicable in a range of situations.
#
#
#   Phillippo, D.M., Ades, A.E., Dias, S., Palmer, S., Abrams, K.R., 
#   Welton, N.J. NICE DSU Technical Support Document 18: Methods for
#   population-adjusted indirect comparisons in submission to NICE. 2016. 
#   Available from http://www.nicedsu.org.uk
#
###############################################################################


### Initial setup
if(!require(dplyr)) {install.packages("dplyr"); library(dplyr)}
if(!require(tidyr)) {install.packages("tidyr"); library(tidyr)}
if(!require(wakefield)) {install.packages("wakefield"); library(wakefield)}
if(!require(ggplot2)) {install.packages("ggplot2"); library(ggplot2)}
if(!require(sandwich)) {install.packages("sandwich"); library(sandwich)}

set.seed(61374988)

# Study characteristics
N_AB <- 500
N_AC <- 300
agerange_AB <- 45:75
agerange_AC <- 45:55
femalepc_AB <- 0.64
femalepc_AC <- 0.8

# Outcome model
b_0 <- 0.85
b_gender <- 0.12
b_age <- 0.05
b_age_trt <- -0.08
b_trt_B <- -2.1
b_trt_C <- -2.5


### Generate AB trial
AB.IPD <- 
  rbind(
    
  # Generate A arm
  r_data_frame(n = N_AB/2,     # Number of individuals in arm A
               id,             # Unique ID
               age = age(x = agerange_AB),   # Generate ages
               gender = gender(prob = c(1 - femalepc_AB, femalepc_AB)), # Generate genders
               trt = "A"       # Assign treatment A
               ),
  
  # Generate B arm
  r_data_frame(n = N_AB/2,     # Number of individuals in arm B
               id,             # Unique ID
               age = age(x = agerange_AB),   # Generate ages
               gender = gender(prob = c(1 - femalepc_AB, femalepc_AB)), # Generate genders
               trt = "B"       # Assign treatment B
               )
  ) %>%
  
  # Generate outcomes using logistic model
  mutate(
    yprob = 1 / (1 + exp(-(
      b_0 + b_gender * (gender == "Male") + b_age * (age - 40) + 
        if_else(trt == "B", b_trt_B + b_age_trt * (age - 40), 0)
    ))),
    y = rbinom(N_AB, 1, yprob)
  ) %>%
  select(-yprob)   # Drop the yprob column

# Tabulate
AB.IPD %>% group_by(trt) %>%
  summarise(n(), mean(age), sd(age), `n(male)`=sum(gender=="Male"), 
            `%(male)`=mean(gender=="Male"), sum(y), mean(y))


### Generate AC trial
AC.IPD <- 
  rbind(
    
  # Generate A arm
  r_data_frame(n = N_AC/2,     # Number of individuals in arm A
               id,             # Unique ID
               age = age(x = agerange_AC),   # Generate ages
               gender = gender(prob = c(1 - femalepc_AC, femalepc_AC)), # Generate genders
               trt = "A"       # Assign treatment A
               ),
  
  # Generate C arm
  r_data_frame(n = N_AC/2,     # Number of individuals in arm C
               id,             # Unique ID
               age = age(x = agerange_AC),   # Generate ages
               gender = gender(prob = c(1 - femalepc_AC, femalepc_AC)), # Generate genders
               trt = "C"       # Assign treatment C
               )
  ) %>%
  
  # Generate outcomes using logistic model
  mutate(
    yprob = 1 / (1 + exp(-(
      b_0 + b_gender * (gender == "Male") + b_age * (age - 40) + 
        if_else(trt == "C", b_trt_C + b_age_trt * (age - 40), 0)
    ))),
    y = rbinom(N_AC, 1, yprob)
  ) %>%
  select(-yprob)   # Drop the yprob column

# Tabulate
AC.IPD %>% group_by(trt) %>%
  summarise(n(), mean(age), sd(age), `n(male)`=sum(gender=="Male"), 
            `%(male)`=mean(gender=="Male"), sum(y), mean(y))

# Create aggregate data
AC.AgD <- 
  cbind(
    # Trial level stats: mean and sd of age, number and proportion of males
    summarise(AC.IPD, age.mean = mean(age), age.sd = sd(age), 
              N.male = sum(gender=="Male"), prop.male = mean(gender=="Male")),
    
    # Summary outcomes for A arm
    filter(AC.IPD, trt == "A") %>% 
      summarise(y.A.sum = sum(y), y.A.bar = mean(y), N.A = n()),
    
    # Summary outcomes for C arm
    filter(AC.IPD, trt == "C") %>% 
      summarise(y.C.sum = sum(y), y.C.bar = mean(y), N.C = n())
  )

AC.AgD

### MAIC
# Objective function
objfn <- function(a1, X){
  sum(exp(X %*% a1))
}

# Gradient function
gradfn <- function(a1, X){
  colSums(sweep(X, 1, exp(X %*% a1), "*"))
}

# Centred EMs
X.EM.0 <- sweep(with(AB.IPD, cbind(age, age^2)), 2, 
                with(AC.AgD, c(age.mean, age.mean^2 + age.sd^2)), '-')

# Estimate weights
print(opt1 <- optim(par = c(0,0), fn = objfn, gr = gradfn, X = X.EM.0, method = "BFGS"))
a1 <- opt1$par

wt <- exp(X.EM.0 %*% a1)

wt.rs <- (wt / sum(wt)) * N_AB   # rescaled weights

# Summary of weights, histogram
summary(wt.rs)
qplot(wt.rs, geom="histogram", 
      xlab = "Rescaled weight (multiple of original unit weight)", 
      binwidth=0.25)

# Effective sample size
sum(wt)^2/sum(wt^2)

# Check balance
AB.IPD %>% 
  mutate(wt) %>% 
  summarise(age.mean = weighted.mean(age, wt), 
            age.sd = sqrt(sum(wt / sum(wt) * (age - age.mean)^2))
            )

AC.AgD[, c("age.mean", "age.sd")]

## Create the weighting estimator using a simple linear model, use sandwich
## estimator for standard error.

# Binomial GLM
fit1 <-
  AB.IPD %>% mutate(y0 = 1 - y, wt = wt) %>%
  glm(cbind(y,y0) ~ trt, data = ., family = binomial, weights = wt)

# Sandwich estimator of variance matrix
V.sw <- vcovHC(fit1)

# The log OR of B vs. A is just the trtB parameter estimate, 
# since effect modifiers were centred
print(d.AB.MAIC <- coef(fit1)["trtB"])
print(var.d.AB.MAIC <- V.sw["trtB","trtB"])

# Estimated log OR of C vs. A from the AC trial
d.AC <- with(AC.AgD, log(y.C.sum * (N.A - y.A.sum) / (y.A.sum * (N.C - y.C.sum))))
var.d.AC <- with(AC.AgD, 1/y.A.sum + 1/(N.A - y.A.sum) + 1/y.C.sum + 1/(N.C - y.C.sum))

# Indirect comparison of C vs. B in AC trial
print(d.BC.MAIC <- d.AC - d.AB.MAIC)
print(var.d.BC.MAIC <- var.d.AC + var.d.AB.MAIC)


### STC

AB.IPD$y0 <- 1 - AB.IPD$y  # Add in dummy non-event column

# Fit binomial GLM
STC.GLM <- glm(cbind(y,y0) ~ trt*I(age - AC.AgD$age.mean), 
               data = AB.IPD, family = binomial)
summary(STC.GLM)

# Try adding prognostic variables to improve model fit
add1(STC.GLM, ~.+gender, test="Chisq")

# Estimated log OR of B vs. A in the AC trial
print(d.AB.STC <- coef(STC.GLM)["trtB"])
print(var.d.AB.STC <- vcov(STC.GLM)["trtB","trtB"])

# Indirect comparison of C vs. B in AC trial
print(d.BC.STC <- d.AC - d.AB.STC)
print(var.d.BC.STC <- var.d.AC + var.d.AB.STC)


### Summary
# True B vs. A effect in the AC population
d.AB.TRUE <- b_trt_B + b_age_trt * (AC.AgD$age.mean - 40)

# Naive approach
AB.IPD %>% group_by(trt) %>%
summarise(y.sum = sum(y)) %>%
spread(trt, y.sum) %>%
  with({
    d.AB.AB <<- log(B * (N_AB/2 - A) / (A * (N_AB/2 - B)))
    var.d.AB.AB <<- 1/B + 1/(N_AB/2 - A) + 1/A + 1/(N_AB/2 - B)
  })

# True C vs. B effect in AC trial
d.BC.TRUE <- b_trt_C - b_trt_B

# Naive approach
d.BC.NAIVE <- d.AC - d.AB.AB
var.d.BC.NAIVE <- var.d.AC + var.d.AB.AB

# Forest plot of results
plotdat <- data_frame(
  id = 1:10,
  Comparison = factor(c(rep(1,4), 2, 2, rep(3,4)),
                      labels = c("B vs. A", "C vs. A", "C vs. B")),
  Estimate = c(d.AB.TRUE, d.AB.MAIC, d.AB.STC, d.AB.AB,
               b_trt_C + b_age_trt * (AC.AgD$age.mean - 40), d.AC, 
               d.BC.TRUE, d.BC.MAIC, d.BC.STC, d.BC.NAIVE),
  var = c(NA, var.d.AB.MAIC, var.d.AB.STC, var.d.AB.AB,
          NA, var.d.AC, 
          NA, var.d.BC.MAIC, var.d.BC.STC, var.d.BC.NAIVE),
  lo = Estimate + qnorm(0.025) * sqrt(var),
  hi = Estimate + qnorm(0.975) * sqrt(var),
  type = c("True", "MAIC", "STC", "Unadjusted",
           "True","Unadjusted",
           "True", "MAIC", "STC", "Unadjusted")
)

ggplot(aes(x = Estimate, y = id, col = type, shape = type), data = plotdat) +
  geom_vline(xintercept = 0, lty = 2) +
  geom_point(size = 2) +
  geom_segment(aes(y = id, yend = id, x = lo, xend = hi), na.rm = TRUE) +
  xlab("Estimate (Log OR)") +
  facet_grid(Comparison~., switch = "y", scales = "free_y", space = "free_y") +
  scale_y_reverse(name = "Comparison in AC population", breaks = NULL, expand = c(0, 0.6))
